{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers.models.qwen2 import Qwen2ForCausalLM, Qwen2Tokenizer\n",
    "import torch\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_name_or_path = \"/home/yuanz/documents/models/Qwen/Qwen2.5-0.5B\"\n",
    "model = Qwen2ForCausalLM.from_pretrained(model_name_or_path, device_map=\"cuda:0\")\n",
    "tokenizer = Qwen2Tokenizer.from_pretrained(model_name_or_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## demo1\n",
    "1. 调用generate方法，让他一直生成新的token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input_ids tensor([[109432, 104130,   9370,  99584, 103852,  45995]], device='cuda:0')\n",
      "attention_mask tensor([[1, 1, 1, 1, 1, 1]], device='cuda:0')\n"
     ]
    }
   ],
   "source": [
    "text = \"介绍一下杭州的良睦路\"\n",
    "\n",
    "model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n",
    "for k, v in model_inputs.items():\n",
    "    print(k, v)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'，以及它与周边地区的联系。\\n良睦'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generated_ids = model.generate(**model_inputs, max_new_tokens=10)\n",
    "generated_ids = [\n",
    "    output_ids[len(input_ids) :]\n",
    "    for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n",
    "]\n",
    "\n",
    "response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
    "response\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## demo2\n",
    "1. 直接基于第一步，生成新token"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': tensor([[109432, 104130,   9370,  99584, 103852,  45995]], device='cuda:0'),\n",
       " 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]], device='cuda:0')}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_inputs1 = {\n",
    "    \"input_ids\": torch.tensor(\n",
    "        [[109432, 104130, 9370, 99584, 103852, 45995]], dtype=torch.long\n",
    "    ).to(model.device),\n",
    "    \"attention_mask\": torch.tensor([[1, 1, 1, 1, 1, 1]], dtype=torch.long).to(\n",
    "        model.device\n",
    "    ),\n",
    "}\n",
    "\n",
    "model_inputs1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "odict_keys(['logits', 'past_key_values'])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs1 = model.forward(**model_inputs1,use_cache=True) # model(**model_inputs1)\n",
    "model_outputs1.keys()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 6, 151936])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs1.logits.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 151936])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs1.logits[:, -1, :].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([3837], device='cuda:0')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs1.logits[:, -1, :].argmax(dim=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'，'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.decode([3837])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tuple, 24, 2, torch.Size([1, 2, 6, 64]))"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(\n",
    "    type(model_outputs1.past_key_values),\n",
    "    # isinstance(model_outputs1.past_key_values, torch.nn.Module),\n",
    "    len(model_outputs1.past_key_values),\n",
    "    len(model_outputs1.past_key_values[0]),\n",
    "    model_outputs1.past_key_values[0][0].shape,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## demo3\n",
    "1. 把上一次生成的past kv 拿过来，加上新拼接的token，生成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_cache#legacy-cache-format)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "odict_keys(['logits', 'past_key_values'])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs2 = model.forward(\n",
    "    **{\n",
    "        \"input_ids\": torch.tensor([[3837]], dtype=torch.long).to(model.device),\n",
    "        \"attention_mask\": torch.tensor([[1]], dtype=torch.long).to(model.device),\n",
    "    },\n",
    "    past_key_values=model_outputs1.past_key_values,\n",
    ")\n",
    "model_outputs2.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## demo4\n",
    "1. 直接模拟简单粗暴类型的生成方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': tensor([[109432, 104130,   9370,  99584, 103852,  45995,   3837]],\n",
       "        device='cuda:0'),\n",
       " 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_inputs3 = {\n",
    "    \"input_ids\": torch.tensor(\n",
    "        [[109432, 104130, 9370, 99584, 103852, 45995, 3837]], dtype=torch.long\n",
    "    ).to(model.device),\n",
    "    \"attention_mask\": torch.tensor([[1, 1, 1, 1, 1, 1, 1]], dtype=torch.long).to(\n",
    "        model.device\n",
    "    ),\n",
    "}\n",
    "\n",
    "model_inputs3\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "odict_keys(['logits', 'past_key_values'])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs3 = model.forward(**model_inputs3) # model(**model_inputs3)\n",
    "model_outputs3.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 验证demo4和demo3输出的logits是不是一样的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.allclose(model_outputs3.logits[:, -1, :], model_outputs2.logits[:, -1, :],atol=1e-4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([101034], device='cuda:0')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_outputs3.logits[:, -1, :].argmax(dim=-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# layer的各个模块\n",
    "\n",
    "## 1. embedding模块\n",
    "\n",
    "1. `[batch_size, seq_len] -> [batch_size, seq_len, hidden_size]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([1, 7]), torch.Size([1, 7, 896]))"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_embedding = model.model.embed_tokens(model_inputs3.get(\"input_ids\"))\n",
    "\n",
    "(model_inputs3.get(\"input_ids\").shape, input_embedding.shape)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. layernorm模块\n",
    "\n",
    "1. 这里主要是input norm、output norm\n",
    "\n",
    "2. `[batch_size, seq_len, hidden_size] -> [batch_size, seq_len, hidden_size]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 7, 896])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "input_layernorm_value = model.model.layers[0].input_layernorm(input_embedding)\n",
    "input_layernorm_value.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. attention 层\n",
    "\n",
    "1. `[batch_size, seq_len, hidden_size] -> [batch_size, seq_len, hidden_size]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# show_attention.py "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. mlp层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([1, 7, 896]), torch.Size([1, 7, 896]))"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(\n",
    "    input_embedding.shape,\n",
    "    model.model.layers[0].mlp(input_embedding).shape\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "list assignment index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m a \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m----> 2\u001b[0m a[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m3\u001b[39m\n\u001b[1;32m      3\u001b[0m a\n",
      "\u001b[0;31mIndexError\u001b[0m: list assignment index out of range"
     ]
    }
   ],
   "source": [
    "a = [1]\n",
    "a[1] = 3\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dev_1027",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
